
/*	Table 2 Panels D & E: time-series average of CS correlation between beta and characters & overlap of stocks in top & bottom portfolio	*/

options threads cpucount=8 sortsize=32G bufno=8;
dm log 'clear';

%let mainfolder=D:\Dropbox\Characteristics Factors\RFS_Replication\;

libname mylib "&mainfolder.\Data_Cleaned";

proc datasets library=work nolist kill;
run;
quit;

data ret;
set mylib.crsp;
where year(date)>=1950;
if (dlstcd = 500 | dlstcd <= 584 and dlstcd >= 520) & hexcd = 3 & (dlret = .S | dlret = .T | dlret = .P) then Ddlret = -.55;
else if (dlstcd = 500 | dlstcd <= 584 and dlstcd >= 520) & (hexcd = 1 | hexcd =2 ) & (dlret = .S | dlret = .T | dlret = .P) then Ddlret = -.3;
else if dlret = .S | dlret = .T | dlret = .A | dlret = .P then Ddlret = .;
else DDLRET=DLRET;
if Dlstcd=. or dlstcd=100 THEN ret=rET; 
ELSE ret=ddlret;
date=intnx('month',date,0,'e');
format date yymmddn8.;
me=abs(prc)*shrout/1000;*ME in millions;
prc=abs(prc);
keep permno date me ret prc exchcd shrcd siccd;
Run;

proc sort data=ret nodupkey;
by permno date descending ret;
run;

proc sort data=ret nodupkey;
by permno date;
run;

data lagme(keep=permno date lagme lagprc lagexchcd lagshrcd lagsiccd);
set ret;
date=intnx('month',date,1,'e');
format date yymmddn8.;
rename me=lagme prc=lagprc exchcd=lagexchcd shrcd=lagshrcd siccd=lagsiccd;
run;

proc sort data=lagme nodupkey;
by permno date;
where not missing(permno) and not missing(date) and not missing(lagme);
run;

data ret;
merge lagme(in=in1) ret(keep=permno date ret in=in2);
by permno date;
if in1;
run;

proc sort data=ret nodupkey;
by permno date;
run;

data compu_crsp(drop=date rename=(date1=date));
set mylib.char(keep=permno date fin car roe op ag bm mom gpa mgmt perf quality june_me);
date1=intnx('month',date,1,'e');*move to crsp timing;
format date1 yymmddn8.;
if bm<=0 then bm=.;
run;

%let minmonth=36;
%let minday=45;

data mbeta;
set mylib.beta1_60;
date=intnx('month',date,0,'e');
format date yymmddn8.;
if freq_smb_ff5<&minmonth then beta1_smb_ff5=.; 
if freq_hml<&minmonth then beta1_hml=.; 
if freq_rmw<&minmonth then beta1_rmw=.; 
if freq_cma<&minmonth then beta1_cma=.; 
if freq_ia<&minmonth then beta1_ia=.; 
if freq_roe<&minmonth then beta1_roe=.; 
if freq_pmu<&minmonth then beta1_pmu=.; 
if freq_qmj<&minmonth then beta1_qmj=.;  
if freq_mgmt<&minmonth then beta1_mgmt=.; 
if freq_fin<&minmonth then beta1_fin=.; 
drop beta1_umd beta1_pead beta1_perf freq_umd freq_pead freq_perf;
run;

proc sort data=mbeta nodupkey;
by permno date;
run;

data dbeta(keep=permno date beta1_umd beta1_pead beta1_perf);
set mylib.beta2_3;
date=intnx('month',date,0,'e');
format date yymmddn8.;
if freq_umd<&minday then beta1_umd=.;
if freq_pead<&minday then beta1_pead=.;
if freq_perf<&minday then beta1_perf=.;
run;

proc sort data=dbeta nodupkey;
by permno date;
run;

data beta(drop=date rename=(date1=date));
merge mbeta(in=in1) dbeta(in=in2);
by permno date;
if in1 and in2;
date1=intnx('month',date,1,'e');
format date1 yymmddn8.;
run;

data all;
merge ret(in=in2) compu_crsp(in=in3) beta(in=in4);
by permno date;
if in2 and in3;
run;

data all1;
set all;
where 1<=lagexchcd<=3 and 10<=lagshrcd<=11;
run;

proc sort data=all1;
by date;
run;

*	Table 2 Panel D, signed correlation between characteristics and beta;
%macro corr(beta=,proxy=,start=);

proc corr data=all1(keep=date permno &beta &proxy) outp=a noprint;
where &start<=date<='31DEC2023'd and not missing(&beta) and not missing(&proxy);
by date;
var &beta &proxy;
run;

data a;
set a;
where _type_='CORR' and lower(_name_)="&proxy";
keep &beta date;
run;

proc sort data=a;
by date;
run;

proc means data=a noprint;
var &beta;
output out=b1(drop=_type_ _freq_) mean=corr;
run;

data b1;
set b1;
factor="&beta";
run;

proc sort data=a out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;
*	add date	;
proc sql;
create table b1
as select distinct a.*,b.date as firstdate format=yymmddn8.
from b1 as a,firstdate as b;
quit;

proc sql;
create table b1
as select distinct a.*,b.date as lastdate format=yymmddn8.
from b1 as a,lastdate as b;
quit;

proc append data=b1 base=b force;
run;
quit;

%mend corr;

proc sql;
drop table b;
quit;

data b;
format factor $24.;
format corr 8.4;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

%corr(beta=beta1_smb_ff5,proxy=june_me,start='01JUL1966'd);
%corr(beta=beta1_hml,proxy=bm,start='01JUL1966'd);
%corr(beta=beta1_rmw,proxy=op,start='01JUL1966'd);
%corr(beta=beta1_cma,proxy=ag,start='01JUL1966'd);
%corr(beta=beta1_ia,proxy=ag,start='01JAN1970'd);
%corr(beta=beta1_roe,proxy=roe,start='01JAN1970'd);
%corr(beta=beta1_pmu,proxy=gpa,start='01JUL1966'd);
%corr(beta=beta1_qmj,proxy=quality,start='01JUL1966'd);
%corr(beta=beta1_umd,proxy=mom,start='01JUL1966'd);
%corr(beta=beta1_mgmt,proxy=mgmt,start='01JUL1966'd);
%corr(beta=beta1_perf,proxy=perf,start='01JUL1966'd);
%corr(beta=beta1_fin,proxy=fin,start='01JUL1975'd);
%corr(beta=beta1_pead,proxy=car,start='01JUL1975'd);
*	signed corr, take the opposite of negative ret-characteristics relation variables (smb, cma, ia, mgmt, perf, fin) ;
data b;
set b(where=(not missing(factor)));
if factor='beta1_smb_ff5' then corr=-corr;
if factor='beta1_cma' then corr=-corr;
if factor='beta1_ia' then corr=-corr;
if factor='beta1_mgmt' then corr=-corr;
if factor='beta1_perf' then corr=-corr;
if factor='beta1_fin' then corr=-corr;
run;
*	avg;
proc means data=b noprint;
var corr;
output out=b_1(drop=_type_ _freq_) mean=;
run;

data b_1;
set b_1;
factor='avg';
run;
*	avg_nosmb;
proc means data=b noprint;
where not(factor='beta1_smb_ff5');
var corr;
output out=b_2(drop=_type_ _freq_) mean=;
run;

data b_2;
set b_2;
factor='avg_nosmb';
run;

proc append data=b_1 base=b force;run;
proc append data=b_2 base=b force;run;

proc export data=b outfile="&mainfolder.\Result\Table2_PanelD.csv" replace;run;
******************************************************************************************;

*	Panel E: % of overlap stocks in top and bottom decile portfolios;
*	take the opposite of negative ret-characteristics relation variables (smb, cma, ia, mgmt, perf, fin);
data all2;
set all1;
june_me_inverse=-june_me;
ag_inverse=-ag;
mgmt_inverse=-mgmt;
perf_inverse=-perf;
fin_inverse=-fin;
run;

%macro corr1(beta=,proxy=,start=);

proc sql;
drop table port1,port1_1,port1_2,port2_1,port2_2,port2,lowport,nlowport_both,nlowport_id1,nlowport_id2,nlowport_total,
highport,nhighport_both,nhighport_id1,nhighport_id2,nhighport_total,nlowport,nhighport,nbothport,nbothport1,date,firstdate,lastdate,b1;
quit;

data port1;
set all2;
where &start<=date<='31DEC2023'd and not missing(&beta);
keep date permno &beta;
run;

proc rank data=port1 out=port1 group=10 ties=dense;
by date;
var &beta;
ranks betagroup;
run;

proc sort data=port1 out=port1_1(keep=date permno) nodupkey;
by date permno;
where betagroup=0;
run;

proc sort data=port1 out=port1_2(keep=date permno) nodupkey;
by date permno;
where betagroup=9;
run;

data port2;
set all2;
where &start<=date<='31DEC2023'd and not missing(&proxy);
keep date permno &proxy;
run;

proc rank data=port2 out=port2 group=10 ties=dense;
by date;
var &proxy;
ranks chargroup;
run;

proc sort data=port2 out=port2_1(keep=date permno) nodupkey;
by date permno;
where chargroup=0;
run;

proc sort data=port2 out=port2_2(keep=date permno) nodupkey;
by date permno;
where chargroup=9;
run;
*	bottom decile;
data lowport;
merge port1_1(in=in1) port2_1(in=in2);
by date permno;
if in1 or in2;
if in1 then id1=1;
else id1=0;
if in2 then id2=1;
else id2=0;
run;

proc sort data=lowport nodupkey;
by date permno;
run;

proc means data=lowport noprint;
where id1=1 and id2=1;
by date;
var id1;
output out=nlowport_both(drop=_type_ _Freq_) n=nboth;
run;

proc means data=lowport noprint;
by date;
var permno;
output out=nlowport_total(drop=_type_ _Freq_) n=ntotal;
run;

data nlowport(keep=date lowpct_total);
merge nlowport_both nlowport_total;
by date;
lowpct_total=nboth/ntotal;
run;
*	top decile;
data highport;
merge port1_2(in=in1) port2_2(in=in2);
by date permno;
if in1 or in2;
if in1 then id1=1;
else id1=0;
if in2 then id2=1;
else id2=0;
run;

proc sort data=highport nodupkey;
by date permno;
run;

proc means data=highport noprint;
where id1=1 and id2=1;
by date;
var id1;
output out=nhighport_both(drop=_type_ _Freq_) n=nboth;
run;

proc means data=highport noprint;
by date;
var permno;
output out=nhighport_total(drop=_type_ _Freq_) n=ntotal;
run;

data nhighport(keep=date highpct_total);
merge nhighport_both nhighport_total;
by date;
highpct_total=nboth/ntotal;
run;

data nbothport;
merge nlowport nhighport;
by date;
run;

proc means data=nbothport noprint;
var lowpct_total highpct_total;
output out=nbothport1(drop=_type_ _Freq_) mean=;
run;

data b1;
set nbothport1;
pct_total=(lowpct_total+highpct_total)*100/2;*in %;
factor="&beta";
run;

proc sort data=nbothport out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;
*	add date	;
proc sql;
create table b1
as select distinct a.*,b.date as firstdate format=yymmddn8.
from b1 as a,firstdate as b;
quit;

proc sql;
create table b1
as select distinct a.*,b.date as lastdate format=yymmddn8.
from b1 as a,lastdate as b;
quit;

proc append data=b1 base=b force;
run;
quit;

%mend corr1;

proc sql;
drop table b;
quit;

data b;
format factor $24.;
format pct_total 8.4;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

%corr1(beta=beta1_smb_ff5,proxy=june_me_inverse,start='01JUL1966'd);
%corr1(beta=beta1_hml,proxy=bm,start='01JUL1966'd);
%corr1(beta=beta1_rmw,proxy=op,start='01JUL1966'd);
%corr1(beta=beta1_cma,proxy=ag_inverse,start='01JUL1966'd);
%corr1(beta=beta1_ia,proxy=ag_inverse,start='01JAN1970'd);
%corr1(beta=beta1_roe,proxy=roe,start='01JAN1970'd);
%corr1(beta=beta1_pmu,proxy=gpa,start='01JUL1966'd);
%corr1(beta=beta1_qmj,proxy=quality,start='01JUL1966'd);
%corr1(beta=beta1_umd,proxy=mom,start='01JUL1966'd);
%corr1(beta=beta1_mgmt,proxy=mgmt_inverse,start='01JUL1966'd);
%corr1(beta=beta1_perf,proxy=perf_inverse,start='01JUL1966'd);
%corr1(beta=beta1_fin,proxy=fin_inverse,start='01JUL1975'd);
%corr1(beta=beta1_pead,proxy=car,start='01JUL1975'd);

data b;
set b(where=(not missing(factor)));
run;
*	avg;
proc means data=b noprint;
var pct_total;
output out=b_1(drop=_type_ _freq_) mean=;
run;

data b_1;
set b_1;
factor='avg';
run;
*	avg_nosmb;
proc means data=b noprint;
where not(factor='beta1_smb_ff5');
var pct_total;
output out=b_2(drop=_type_ _freq_) mean=;
run;

data b_2;
set b_2;
factor='avg_nosmb';
run;

proc append data=b_1 base=b force;run;
proc append data=b_2 base=b force;run;

proc export data=b outfile="&mainfolder.\Result\Table2_PanelE.csv" replace;run;
